*************************************************************************************************
*																								*
*						Flexible Wages, Bargaining, and The Gender Gap							*
*						Barbara Biasi and Heather Sarsons										*
*						Preamble - prepares dataset												*
*																								*
*************************************************************************************************


* clean new agreement data
import excel using agreements_table.xlsx, firstrow clear
replace extension = "2012" if regexm(extension,"2012") & extension != "2012"
destring extension, replace
keep district_code exp ext doubt
rename exp expire
sort district
save new_agreements.dta, replace

* Load data
use teachers_panel_va.dta, clear

* Sample
keep if year >= 2007 & year <= 2016 

* fix a couple wrong gender
replace female = 	1	if id == 	291
replace female = 	1	if id == 	7004
replace female = 	1	if id == 	9619
replace female = 	1	if id == 	23328
replace female = 	1	if id == 	23885
replace female = 	0	if id == 	24716
replace female = 	0	if id == 	28812
replace female = 	1	if id == 	38028
replace female = 	1	if id == 	48703
replace female = 	0	if id == 	51266
replace female = 	1	if id == 	52406
replace female = 	1	if id == 	53495
replace female = 	1	if id == 	60888
replace female = 	0	if id == 	62644
replace female = 	1	if id == 	67738
replace female = 	0	if id == 	80899
replace female = 	0	if id == 	88308
replace female = 	1	if id == 	99370
replace female = 	1	if id == 	99584
replace female = 	1	if id == 	99918
replace female = 	1	if id == 	110705
replace female = 	0	if id == 	125443
replace female = 	0	if id == 	132477
replace female = 	0	if id == 	133252
replace female = 	0	if id == 	134574
replace female = 	1	if id == 	135406
replace female = 	1	if id == 	139546
replace female = 	1	if id == 	141272
replace female = 	0	if id == 	143360
replace female = 	1	if id == 	148014
replace female = 	0	if id == 	152688
replace female = 	1	if id == 	153128
replace female = 	1	if id == 	163448
replace female = 	1	if id == 	167706
replace female = 	1	if id == 	169711
replace female = 	0	if id == 	171235
replace female = 	0	if id == 	176876
replace female = 	1	if id == 	187592
replace female = 	0	if id == 	189470
replace female = 	1	if id == 	190067
replace female = 	0	if id == 	191249
replace female = 	0	if id == 	201874
replace female = 	0	if id == 	213225
replace female = 	1	if id == 	214681
replace female = 	0	if id == 	221114
replace female = 	0	if id == 	221721
replace female = 	0	if id == 	225332
replace female = 	1	if id == 	276708
replace female = 	0	if id == 	349361
replace female = 	1	if id == 	349415
replace female = 	1	if id == 	350510
replace female = 	0	if id == 	385662
replace female = 	0	if id == 	408524
replace female = 	0	if id == 	418417
replace female = 	1	if id == 	440910
replace female = 	1	if id == 	450911
replace female = 	0	if id == 	502199
replace female = 	0	if id == 	502204
replace female = 	1	if id == 	569201
replace female = 	1	if id == 	589981
replace female = 	0	if id == 	590252
replace female = 	1	if id == 	591594
replace female = 	0	if id == 	596384
replace female = 	0	if id == 	597070
replace female = 	0	if id == 	598086
replace female = 	1	if id == 	600186
replace female = 	0	if id == 	601346
replace female = 	1	if id == 	603442
replace female = 	0	if id == 	605347
replace female = 	0	if id == 	609679
replace female = 	0	if id == 	615138
replace female = 	1	if id == 	615815
replace female = 	0	if id == 	616035
replace female = 	1	if id == 	616873
replace female = 	1	if id == 	619092
replace female = 	0	if id == 	621024
replace female = 	1	if id == 	621810
replace female = 	0	if id == 	622194
replace female = 	1	if id == 	629081
replace female = 	0	if id == 	631073
replace female = 	1	if id == 	631155
replace female = 	0	if id == 	631312
replace female = 	0	if id == 	631430
replace female = 	1	if id == 	632108
replace female = 	0	if id == 	633989
replace female = 	1	if id == 	633994
replace female = 	0	if id == 	634082
replace female = 	1	if id == 	634492
replace female = 	0	if id == 	637479
replace female = 	1	if id == 	645539
replace female = 	0	if id == 	645901
replace female = 	1	if id == 	654227
replace female = 	0	if id == 	655043
replace female = 	1	if id == 	656035
replace female = 	1	if id == 	657289
replace female = 	0	if id == 	659011
replace female = 	0	if id == 	659913
replace female = 	0	if id == 	665354
replace female = 	1	if id == 	670085
replace female = 	1	if id == 	672079
replace female = 	1	if id == 	672691
replace female = 	0	if id == 	673245
replace female = 	1	if id == 	674134
replace female = 	1	if id == 	674615
replace female = 	0	if id == 	676825
replace female = 	1	if id == 	678254
replace female = 	0	if id == 	678445
replace female = 	0	if id == 	681597
replace female = 	0	if id == 	681964
replace female = 	1	if id == 	683300
replace female = 	0	if id == 	687810
replace female = 	0	if id == 	687814
replace female = 	0	if id == 	693355
replace female = 	0	if id == 	693749
replace female = 	0	if id == 	694217
replace female = 	1	if id == 	694851
replace female = 	0	if id == 	696506
replace female = 	0	if id == 	696730
replace female = 	0	if id == 	697702
replace female = 	1	if id == 	700081
replace female = 	0	if id == 	700182
replace female = 	1	if id == 	702742
replace female = 	1	if id == 	706395
replace female = 	1	if id == 	707221
replace female = 	0	if id == 	710037
replace female = 	0	if id == 	710549
replace female = 	0	if id == 	711770
replace female = 	0	if id == 	711877
replace female = 	1	if id == 	712327
replace female = 	1	if id == 	712519
replace female = 	1	if id == 	716603
replace female = 	0	if id == 	718497
replace female = 	0	if id == 	718635
replace female = 	0	if id == 	719877
replace female = 	0	if id == 	719892
replace female = 	0	if id == 	721801
replace female = 	0	if id == 	723295
replace female = 	1	if id == 	723939
replace female = 	0	if id == 	726039
replace female = 	1	if id == 	726494
replace female = 	0	if id == 	727135
replace female = 	0	if id == 	727996
replace female = 	1	if id == 	730197
replace female = 	0	if id == 	733732
replace female = 	1	if id == 	734574
replace female = 	1	if id == 	734908
replace female = 	0	if id == 	735008
replace female = 	0	if id == 	743453
replace female = 	1	if id == 	745081
replace female = 	1	if id == 	746972
replace female = 	0	if id == 	750654
replace female = 	1	if id == 	751206

* new agreement data
drop expire extension
sort district_code
merge m:1 district_code using new_agreements.dta
drop if _m == 2
drop _m

* the following change sets exp = 2011 even to districts w/first handbook in 2012.

/*
replace expire = 2011 if doubt == "possibly 2011"
replace extension = 0 if doubt == "possibly 2011"

replace expire = 2011 if doubt == "possibly exp = 2011, no ext"
replace extension = 0 if doubt == "possibly exp = 2011, no ext"
replace expire = 2013 if doubt == "possibly exp = 2013, no ext"
replace extension = 0 if doubt == "possibly exp = 2013, no ext"
*/

*
drop doubt
gen Extension = extension
replace extension = expire if extension == 0 | extension == .

*--------->  Variables we need

gen logsalary = log(salary_n)

gen postexp = year > expire
replace postexp = . if expire == .

gen postext = year > ext
replace postext = . if ext == .

gen masterup = master == 1 | phd == 1


* controls: grade

/*

gen kinder 	= highgrade == "K3" | highgrade == "K4" | highgrade == "KG" | highgrade == "PK"
gen elem 	= highgrade == "01" | highgrade == "02" | highgrade == "03" | highgrade == "04" | highgrade == "05" | highgrade == "06"
gen middle 	= highgrade == "07" | highgrade == "08"
gen high 	= highgrade == "09" | highgrade == "10" | highgrade == "11" | highgrade == "12"
*/

gen Hgrade = highgrade
gen Lgrade = lowgrade

foreach var in Hgrade Lgrade {
	replace `var' = "-1" if `var' == "KG"
	replace `var' = "-2" if `var' == "PK"
	replace `var' = "-3" if `var' == "K3"
	replace `var' = "-4" if `var' == "K4"
	destring `var', replace
}


gen prek = Lgrade <= -2
gen kinder =  Lgrade <= -1 & Hgrade >= -1
gen elem = Lgrade <= 5 & Hgrade >= 1
gen middle = Lgrade <= 8 & Hgrade >= 6
gen high = Hgrade >= 9

drop if kinder == 0 & elem == 0 & middle == 0 & high == 0



* dummy for teachers in top and bottom quartile of value-added distribution
qui sum va_s, det
gen highva = 1 if va_s > r(p50)
replace highva = 0 if va_s <= r(p50)
replace highva = . if va_s == .

* absorbed controls
global exp_post = "i.district_code i.district_code#i.post i.totalexp i.totalexp#i.post i.master i.master#i.post i.phd i.phd#i.post i.high i.high#i.post i.math i.math#i.post i.year"
global exp = "i.district_code i.district_code#i.postexp i.totalexp i.totalexp#i.postexp i.master i.master#i.postexp i.phd i.phd#i.postexp i.middle i.middle#i.postexp i.high i.high#i.postexp i.math i.math#i.postexp i.year i.year##i.expire"

global exp_pp = "i.district_code i.district_code#i.postexp i.totalexp i.pp#i.totalexp i.totalexp#i.postexp i.pp#i.totalexp#i.postexp i.masterup i.pp#i.masterup i.masterup#i.postexp i.pp#i.masterup#i.postexp  i.phd i.phd#i.postexp  i.phd#i.pp i.phd#i.postexp#i.pp i.middle i.middle#i.postexp i.middle#i.pp i.middle#i.postexp#i.pp i.high i.pp#i.high i.high#i.postexp i.pp#i.high#i.postexp i.math i.math#i.pp i.math#i.postexp i.math#i.postexp#i.pp i.year i.pp#i.year i.year#i.expire i.pp#i.year#i.expire"

*--> time variables

* expiration year
gen time = year - expire
qui tab time 
forvalues n = 1/ `r(r)' {
local z = `n' - 7
gen fem_`n' = female * (time == `z')
replace fem_`n' = . if time == .
label var fem_`n' "`z'"
}

* generate trick variables for table labeling

gen Ed = 0
gen Edexp = 0
gen Edext = 0
gen Yr = 0

global tag_exp = "Ed Edexp Yr"
global tag_ext = "Ed Edext Yr"

label var Ed "District, Educ, Exper"
label var Edexp "District, Educ, Exper * post-expir"
label var Edext "District, Educ, Exper * post-extens"
label var Yr "Year"


*keep if va_s != . 

gen timex = year - ext


gen zero = 0
label var zero "0"

qui tab timex
forvalues n = 1/ `r(r)' {
local z = `n' - 10
gen D_`n' = (timex == `z')
replace D_`n' = . if timex == .
label var D_`n' "`z'"
}
qui tab timex
forvalues n = 1/ `r(r)' {
local z = `n' - 10
gen femx_`n' = female * (timex == `z')
replace femx_`n' = . if timex == .
label var femx_`n' "`z'"
}
egen Timex = group(timex)

* assign teacher w/2011 district
capt rename school_code schoolcode

tempvar X
gen `X' = district_code if year == 2011
bysort id: egen district2011 = max(`X')
forvalues y = 2010(-1)2007 {
	tempvar X
	gen `X' = district_code if year == `y'
	tempvar Y
	bysort id: egen `Y' = max(`X')
	replace district2011 = `Y' if district2011 == .
}

forvalues y = 2012(1)2016 {
	tempvar X
	gen `X' = district_code if year == `y'
	tempvar Y
	bysort id: egen `Y' = max(`X')
	replace district2011 = `Y' if district2011 == .
}
tempvar X
gen `X' = schoolcode if year == 2011
bysort id: egen school2011 = max(`X')
forvalues y = 2010(-1)2007 {
	tempvar X
	gen `X' = schoolcode if year == `y'
	tempvar Y
	bysort id: egen `Y' = max(`X')
	replace school2011 = `Y' if school2011 == .
}

forvalues y = 2012(1)2016 {
	tempvar X
	gen `X' = schoolcode if year == `y'
	tempvar Y
	bysort id: egen `Y' = max(`X')
	replace school2011 = `Y' if school2011 == .
}


* Merge admin info
preserve
use administrators.dta, clear
keep if year > 2006 & year <= 2016
keep if position == 51 // & year == 2011
collapse male_princ = female, by(schoolcode district_code year)
replace male_princ = round(male)
replace male_princ = 1 - male_princ
sort schoolcode district_code year
save temp.dta, replace
restore
sort schoolcode district_code year
merge m:1 schoolcode district_code year using temp.dta
drop _m 
rename district_code D
rename schoolcode S
rename district2011 district_code
rename school2011 schoolcode
rename male_princ Male_princ
merge m:1 schoolcode district_code year using temp.dta
drop _m 
rename district_code district2011 
rename schoolcode school2011 
rename Male_princ male_princ_year
rename male_princ male_princ_yr2011
rename D district_code 
rename S schoolcode
rm temp.dta


preserve
use administrators.dta, clear
keep if year > 2006 & year <= 2016
keep if position == 5 // & year == 2011
collapse male_super = female, by(district_code year)
replace male_super = round(male)
replace male_super = 1 - male_super
sort district_code year
save temp.dta, replace
restore
sort district_code year
merge m:1 district_code year using temp.dta
drop _m

rename district_code D
rename district2011 district_code
rename male_super Male_super
merge m:1 district_code year using temp.dta
drop _m 
rename district_code district2011 
rename Male_super male_super_year
rename male_super male_super_yr2011
rename D district_code 
rm temp.dta


* principal during exp year
gen P = male_princ_year if year == 2011
bysort id: egen maleprinc = max(P)
drop P
* super during exp year
gen P = male_super_year if year == 2011
bysort id: egen malesuper = max(P)
drop P

* mean gender principal in the years before extension during exp year
*gen P = male_princ if year <= extension
gen P = male_princ_year if year <= 2011 & year >= 2010
bysort id: egen maleprinc_pre = max(P)
replace maleprinc_pre = round(maleprinc_pre)
drop P
* mean gender super in the years before extension during exp year
*gen P = male_super if year <= extension
gen P = male_super_year if year <= 2011 & year >= 2010
bysort id: egen malesuper_pre = max(P)
replace malesuper_pre = round(malesuper_pre)
drop P


* Interaction

g male_super_princ = (malesuper_pre==1&maleprinc_pre==1)
g fem_super_princ = (malesuper_pre==0&maleprinc_pre==0)
g fems_malep = (malesuper_pre==0&maleprinc_pre==1)
g males_femp = (malesuper_pre==1&maleprinc_pre==0)

gen female_postexp = female * postexp
gen female_postext = female * postext

* FLFP
merge m:1 district_code using district_county_cz.dta
	drop if _m==2
	drop _m
	egen schoolid = group(district_code schoolcode)
	replace schoolid = . if district_code == . | schoolcode == .
	* Number of schools in commuting zone
	bys schoolid: g n=1 if _n==1
	bys cz: egen num_schools_cz = sum(n)
	drop n
	
	bys schoolid: g hs=1 if _n==1&all_elem==0
	bys cz: egen num_hs_cz = sum(hs)
	g num_elem_cz = num_schools_cz-num_hs_cz
	
	* In year before expiration/extension
	bys id: egen pre_num_hs = mean(num_hs_cz) if postexp==0
	bys id: egen pre_num_elem = mean(num_elem_cz) if postexp==0
	bys id: egen pre_cz_hs = max(num_hs_cz)
	by id: egen pre_cz_elem = max(num_elem_cz)
	g pre_cz_schools = pre_cz_elem+pre_cz_hs
	drop pre_num_hs pre_num_elem
	
merge m:1 county_id year using flfp.dta
	drop if _m==2
	drop _m

label var female "Female"
label var female_postexp "Female $\times$ Post Expiration"
label var female_postext "Female $\times$ Post Extension"
label var postexp "Post Expiration"
label var postext "Post Extension"

* Mean share boys in the years before extension
preserve
use enrollment_school.dta, clear
gen shareboy = 100 - percent_of_group_female 
gen P = shareboy if year <= 2011
keep P school_code district_code year
duplicates drop
collapse shareboy_pre = P, by(school_code district_code)
sort school_code district_code
save temp.dta, replace
restore
rename schoolcode school_code
sort school_code district_code
merge m:1 school_code district_code using temp.dta
rm temp.dta
drop if _m == 2
drop _m



rename bachelor ba 


	
